1. Load libraries

x<-c("car", "lme4", "ggplot2", "dplyr", "tibble", "stringr", 'fitdistrplus', 'sjPlot', "condformat", 'kableExtra')
lapply(x, require, character.only = TRUE)

2. Load data

GC<-read.csv("GCunmatched.csv", h=T)
NEO<-read.csv("NEOunmatched.csv", h=T)
GC_Behavior<-read.csv("GC_behavior_matched.csv")
TL_NEO<-read.csv("Time-lagged_NEO.csv", h=T)
Concurrent_NEO<-read.csv("Concurrent_NEO.csv", h=T)

3. Test for normal distribution in response variables

Response variable: fGC

par(mfrow=c(1,2)) 
hist(GC$final.conc, breaks=100)
hist(log(GC$final.conc),breaks=100)  

ln transformed fGC more normally distributed

descdist(log(GC$final.conc), boot= 500)

## summary statistics
## ------
## min:  4.817051   max:  8.023356 
## median:  6.507725 
## mean:  6.539997 
## estimated sd:  0.5080615 
## estimated skewness:  0.04898005 
## estimated kurtosis:  3.512847
plotdist(log(GC$final.conc), histo=TRUE, demp=TRUE)

Response variable: uNEO

par(mfrow=c(1,2)) 
hist(NEO$NEO.CR, breaks=100)
hist((log(NEO$NEO.CR)), breaks=100)

ln transformed NEO more normally distributed

descdist(log(NEO$NEO.CR), boot= 500)

## summary statistics
## ------
## min:  3.659753   max:  6.855729 
## median:  5.42101 
## mean:  5.430162 
## estimated sd:  0.4942345 
## estimated skewness:  0.1014 
## estimated kurtosis:  2.873707
plotdist(log(NEO$NEO.CR), histo=TRUE, demp=TRUE)

4. Check for homoscedascity (residuals vs fitted values)

Linear models response variable: ln(GC)

GC.LM1=lm(log(GC.final.conc)~ FreqAllProx + RelRank, data=GC_Behavior)
GC.LM2=lm(log(GC.final.conc)~ FreqTotalGR  + RelRank, data=GC_Behavior)
GC.LM3=lm(log(GC.final.conc)~ FreqGRRec + RelRank, data=GC_Behavior)
GC.LM4=lm(log(GC.final.conc)~ FreqGRGiven + RelRank, data=GC_Behavior)
GC.LM5=lm(log(GC.final.conc)~ FreqTimeSpentCop + RelRank, data=GC_Behavior)
GC.LM6=lm(log(GC.final.conc)~ FreqTimeSpentConsort + RelRank, data=GC_Behavior)
par(mfrow = c(2,2))
GC.LM1=lm(log(GC.final.conc)~ FreqAllProx + RelRank, data=GC_Behavior)
plot(GC.LM1)

GC.LM2=lm(log(GC.final.conc)~ FreqTotalGR  + RelRank, data=GC_Behavior)
plot(GC.LM2)

GC.LM3=lm(log(GC.final.conc)~ FreqGRRec + RelRank, data=GC_Behavior)
plot(GC.LM3)

GC.LM4=lm(log(GC.final.conc)~ FreqGRGiven + RelRank, data=GC_Behavior)
plot(GC.LM4)

GC.LM5=lm(log(GC.final.conc)~ FreqTimeSpentCop + RelRank, data=GC_Behavior)
plot(GC.LM5)

GC.LM6=lm(log(GC.final.conc)~ FreqTimeSpentConsort + RelRank, data=GC_Behavior)
plot(GC.LM6)

points appear evenly disbursed around 0

Linear models response variable: time-lagged ln(NEO.CR)

NEO.TL.LM1=lm(log(NEO.CR)~ FreqAllProx  + RelRank, data=TL_NEO)
NEO.TL.LM2=lm(log(NEO.CR)~ FreqTotalGR + RelRank, data=TL_NEO)
NEO.TL.LM3=lm(log(NEO.CR)~ FreqGRRec + RelRank, data=TL_NEO)
NEO.TL.LM4=lm(log(NEO.CR)~ FreqGRGiven + RelRank, data=TL_NEO)
NEO.TL.LM5=lm(log(NEO.CR)~ FreqTimeSpentCop + RelRank, data=TL_NEO)
NEO.TL.LM6=lm(log(NEO.CR)~ FreqTimeSpentConsort + RelRank, data=TL_NEO)
par(mfrow = c(2,2))
NEO.TL.LM1=lm(log(NEO.CR)~ FreqAllProx  + RelRank, data=TL_NEO)
plot(NEO.TL.LM1)

NEO.TL.LM2=lm(log(NEO.CR)~ FreqTotalGR + RelRank, data=TL_NEO)
plot(NEO.TL.LM2)

NEO.TL.LM3=lm(log(NEO.CR)~ FreqGRRec + RelRank, data=TL_NEO)
plot(NEO.TL.LM3)

NEO.TL.LM4=lm(log(NEO.CR)~ FreqGRGiven + RelRank, data=TL_NEO)
plot(NEO.TL.LM4)

NEO.TL.LM5=lm(log(NEO.CR)~ FreqTimeSpentCop + RelRank, data=TL_NEO)
plot(NEO.TL.LM5)

NEO.TL.LM6=lm(log(NEO.CR)~ FreqTimeSpentConsort + RelRank, data=TL_NEO)
plot(NEO.TL.LM6)

points appear evenly disbursed around 0

Linear models response variable: Concurrent ln(NEO.CR)

NEO.Con.LM1=lm(log(NEO.CR)~ FreqAllProx  + RelRank, data=Concurrent_NEO)
NEO.Con.LM2=lm(log(NEO.CR)~ FreqTotalGR + RelRank, data=Concurrent_NEO)
NEO.Con.LM3=lm(log(NEO.CR)~ FreqGRRec + RelRank, data=Concurrent_NEO)
NEO.Con.LM4=lm(log(NEO.CR)~ FreqGRGiven + RelRank, data=Concurrent_NEO)
NEO.Con.LM5=lm(log(NEO.CR)~ FreqTimeSpentCop + RelRank, data=Concurrent_NEO)
NEO.Con.LM6=lm(log(NEO.CR)~ FreqTimeSpentConsort + RelRank, data=Concurrent_NEO)
par(mfrow = c(2,2))
NEO.Con.LM1=lm(log(NEO.CR)~ FreqAllProx  + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM1)

NEO.Con.LM2=lm(log(NEO.CR)~ FreqTotalGR + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM2)

NEO.Con.LM3=lm(log(NEO.CR)~ FreqGRRec + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM3)

NEO.Con.LM4=lm(log(NEO.CR)~ FreqGRGiven + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM4)

NEO.Con.LM5=lm(log(NEO.CR)~ FreqTimeSpentCop + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM5)

NEO.Con.LM6=lm(log(NEO.CR)~ FreqTimeSpentConsort + RelRank, data=Concurrent_NEO)
plot(NEO.Con.LM6)

residuals appear evenly disbursed around 0

5. CHECK FOR COLLINEARITY

VIFs for GC models

GC_Linear_model Independent_Variable VIF
model 1 FreqAllProx 1.070511
model 2 FreqTotalGR 1.020731
model 3 FreqGRRec 1.016083
model 4 FreqGRGiven 1.006063
model 5 FreqTimeSpentCop 1.053784
model 6 FreqTimeSpentConsort 1.019914

All VIFs < 2

VIFs for Time-lagged NEO models

NEO.TL_Linear_model Independent_Variable VIF
model 1 FreqAllProx 1.068882
model 2 FreqTotalGR 1.002498
model 3 FreqGRRec 1.004169
model 4 FreqGRGiven 1.000119
model 5 FreqTimeSpentCop 1.031555
model 6 FreqTimeSpentConsort 1.049770

All VIFs < 2

VIFs for Concurrent NEO models

NEO.Con_Linear_model Independent_Variable VIF
model 1 FreqAllProx 1.067799
model 2 FreqTotalGR 1.000007
model 3 FreqGRRec 1.000234
model 4 FreqGRGiven 1.000196
model 5 FreqTimeSpentCop 1.026227
model 6 FreqTimeSpentConsort 1.051667

All VIFs < 2